# Appendix C Figure 7
rm(list = ls())
setwd("/Users/John/Dropbox/")

# --- Load packages ---
library(readxl)
library(readr)
library(tidyverse)
library(ggplot2)
library(cowplot)

# --- Load data ---
gp <- read_excel("JOP_Replication_Materials/data/raw/dv_clean.xlsx", sheet = "government_procurement") %>%
  pivot_longer(cols = 7:11, values_to = "isic")

combined <- bind_rows(gp) %>%
  filter(!is.na(isic)) %>%
  group_by(isic, year) %>%
  mutate(isic_year = n())

df <- read_csv("JOP_Replication_Materials/data/processed/final_dataset.csv")
strategic <- filter(df, strategic == 1) %>%
  distinct(isic) %>%
  mutate(strategic = 1)

ccd_isic <- read_csv("JOP_Replication_Materials/data/processed/ccd_isic_processing.csv") %>%
  dplyr::select(year, isic, processing_share) %>%
  filter(!is.na(isic))

# --- Prepare strategic/non-strategic plot ---
combined <- combined %>%
  mutate(isic = str_pad(isic, 4, pad = "0"))

strategic$isic <- as.character(strategic$isic)

ptplot <- left_join(combined, strategic, by = "isic") %>%
  mutate(strategic = ifelse(is.na(strategic), 0, strategic)) %>%
  group_by(strategic, year) %>%
  mutate(group_year = length(unique(desc_chinese))) %>%
  filter(year >= 1995 & year <= 2014) %>%
  mutate(Group = factor(ifelse(strategic == 1, "Strategic", "Not Strategic"),
                        levels = c("Strategic", "Not Strategic"))) %>%
  distinct(year, Group, .keep_all = TRUE)

# --- Strategic vs. Non-Strategic (p1) ---
p1 <- ggplot(ptplot, aes(x = year, y = group_year, group = Group)) + 
  geom_line(aes(linetype = Group, color = Group)) +
  geom_text(data = ptplot %>% filter(year == 2013),
            aes(label = Group),
            hjust = c(1.55, 1.38), vjust = -.5, size = 5) +
  labs(title = "Strategic vs. Non-Strategic Industries",
       y = "No. of Tech Absorption Policies in Place", x = "Year") + 
  scale_linetype_manual(values = c("solid", "dashed")) +
  scale_color_manual(values = c("black", "black")) +
  ylim(0, 300) +
  theme_bw(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none"
  )

# --- Prepare processing share plot ---
ccd_summary <- ccd_isic %>%
  group_by(isic) %>%
  summarise(mean_share = mean(processing_share, na.rm = TRUE)) %>%
  mutate(median_share = median(mean_share, na.rm = TRUE)) %>%
  mutate(categorical = findInterval(mean_share, quantile(mean_share, probs = seq(0, 1, length.out = 5), na.rm = TRUE)))

ptplot2 <- left_join(combined, strategic, by = "isic") %>%
  mutate(strategic = ifelse(is.na(strategic), 0, strategic)) %>%
  filter(year >= 1995 & year <= 2014)

ptplot_ccd <- left_join(ptplot2, ccd_summary, by = "isic") %>%
  filter(!is.na(categorical)) %>%
  mutate(group = case_when(
    strategic == 1 & categorical < 4 ~ "Downstream",
    strategic == 1 & categorical >= 4 ~ "Intermediate",
    strategic == 0 ~ "Not Strategic"
  )) %>%
  group_by(year, group) %>%
  mutate(group_category_year = length(unique(desc_chinese))) %>%
  ungroup() %>%
  distinct(year, group, .keep_all = TRUE) %>%
  mutate(Group = factor(group, levels = c("Downstream", "Intermediate", "Not Strategic"))) 



# --- Downstream vs. Intermediate (p2) ---
p2 <- ggplot(ptplot_ccd, aes(x = year, y = group_category_year, group = Group, color = Group)) + 
  geom_line(aes(linetype = Group)) +
  geom_text(data = ptplot_ccd %>% filter(year == 2012),
            aes(label = Group),
            hjust = c(-0.2, -0.2, -0.2), vjust = c(-.5, -.5, -1), size = 5) +
  labs(title = "Downstream vs. Intermediate",
       y = NULL, x = "Year") +
  scale_linetype_manual(values = c("solid", "dashed", "dotdash")) +
  scale_color_manual(values = c("black", "gray35", "gray50")) +
  ylim(0, 300) +
  theme_bw(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.title.y = element_blank()
  )

# --- Combine the plots side-by-side ---
combined_figure <- plot_grid(p1, p2, labels = NULL, nrow = 1, align = "v")

# --- Save as PDF ---
ggsave(
  filename = "JOP_Replication_Materials/appendix/output/C_figure_8.pdf",
  plot = combined_figure,
  width = 11,      # Full page width
  height = 5.5,    # Shorter than default
  units = "in"
)

browseURL("JOP_Replication_Materials/appendix/output/C_figure_8.pdf")
